- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4Py Clustering KM.dsnb
executable file
·1 lines (1 loc) · 108 KB
/
OML4Py Clustering KM.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Clustering KM","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":[],"enabled":true,"result":{"startTime":1713901552051,"interpreter":"md.low","endTime":1713901552210,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":0,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","## Identifying Customer Segments using K-Means Clustering","Oracle Machine Learning supports clustering using several algorithms: k-Means, O-Cluster, and Expectation Maximization. In this notebook, we illustrate how to identify natural clusters of customers using the CUSTOMERS dataset from the SH schema using the unsupervised learning k-Means algorithm. The data exploration, preparation, and machine learning run inside Oracle Autonomous Database.","","See the documentation link below for details on the K-Means in-database algortihm.","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":{"startTime":1713901552440,"interpreter":"md.low","endTime":1713901552515,"results":[{"message":"<h2 id=\"identifying-customer-segments-using-k-means-clustering\">Identifying Customer Segments using K-Means Clustering<\/h2>\n<p>Oracle Machine Learning supports clustering using several algorithms: k-Means, O-Cluster, and Expectation Maximization. In this notebook, we illustrate how to identify natural clusters of customers using the CUSTOMERS dataset from the SH schema using the unsupervised learning k-Means algorithm. The data exploration, preparation, and machine learning run inside Oracle Autonomous Database.<\/p>\n<p>See the documentation link below for details on the K-Means in-database algortihm.<\/p>\n<p>Copyright (c) 2024 Oracle Corporation<\/p>\n<h6 id=\"the-universal-permissive-license-upl-version-10\"><a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a><\/h6>\n<hr />\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":9,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md",""],"enabled":true,"result":{"startTime":1713901552602,"interpreter":"md.low","endTime":1713901552680,"results":[{"message":"<p><img src=\"http://www.oracle.com/technetwork/database/options/advanced-analytics/clustering-5663171.jpg\" alt=\"tiny arrow\" title=\"tiny arrow\" /><\/p>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":3,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://www.oracle.com/goto/ml-clustering\" target=\"_blank\">OML Clustering<\/a>","* <a href=\"https://oracle.com/goto/ml-k-means\" target=\"_blank\">OML K-Means<\/a>"],"enabled":true,"result":{"startTime":1713901552755,"interpreter":"md.low","endTime":1713901552829,"results":[{"message":"<ul>\n<li><a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a><\/li>\n<li><a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a><\/li>\n<li><a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a><\/li>\n<li><a href=\"https://www.oracle.com/goto/ml-clustering\" target=\"_blank\">OML Clustering<\/a><\/li>\n<li><a href=\"https://oracle.com/goto/ml-k-means\" target=\"_blank\">OML K-Means<\/a><\/li>\n<\/ul>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import libraries and set display options","message":["%python","","import pandas as pd","import numpy as np","import matplotlib.pyplot as plt","import oml","","pd.set_option('display.max_rows', 500)","pd.set_option('display.max_columns', 500)","pd.set_option('display.width', 1000)"],"enabled":true,"result":{"startTime":1713901552904,"interpreter":"python.low","endTime":1713901554901,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Prepare data by merging CUSTOMERS with SUPPLEMENTARY_DEMOGRAPHICS","message":["%python","","CUSTOMERS = oml.sync(query = \"\"\"SELECT CUST_ID, CUST_GENDER, CUST_MARITAL_STATUS, CUST_YEAR_OF_BIRTH, CUST_INCOME_LEVEL, CUST_CREDIT_LIMIT "," FROM SH.CUSTOMERS\"\"\")","DEMO_DF = oml.sync(query = \"\"\"SELECT CUST_ID, EDUCATION, AFFINITY_CARD, HOUSEHOLD_SIZE, OCCUPATION, YRS_RESIDENCE, Y_BOX_GAMES"," FROM SH.SUPPLEMENTARY_DEMOGRAPHICS\"\"\")","CUST_DF = CUSTOMERS.merge(DEMO_DF, how = \"inner\", on = 'CUST_ID',suffixes = [\"\",\"\"])"],"enabled":true,"result":{"startTime":1713901554972,"interpreter":"python.low","endTime":1713901555123,"results":[{"message":"<stdin>:5: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display the first 5 rows of CUST_DF data","message":["%python","","z.show(CUST_DF.head())"],"enabled":true,"result":{"startTime":1713901555197,"interpreter":"python.low","endTime":1713901555349,"results":[{"message":"CUST_ID\tCUST_GENDER\tCUST_MARITAL_STATUS\tCUST_YEAR_OF_BIRTH\tCUST_INCOME_LEVEL\tCUST_CREDIT_LIMIT\tEDUCATION\tAFFINITY_CARD\tHOUSEHOLD_SIZE\tOCCUPATION\tYRS_RESIDENCE\tY_BOX_GAMES\n100134\tF\tDivorc.\t1965\tL: 300,000 and above\t9000\tAssoc-A\t0\t2\tCleric.\t2\t0\n102828\tF\tNeverM\t1967\tE: 90,000 - 109,999\t10000\tHS-grad\t0\t1\tMachine\t4\t0\n101232\tM\tNeverM\t1979\tJ: 190,000 - 249,999\t9000\t< Bach.\t0\t1\tOther\t2\t1\n100696\tM\tMarried\t1971\tF: 110,000 - 129,999\t7000\tProfsc\t1\t3\tProf.\t3\t0\n103948\tM\tNeverM\t1966\tJ: 190,000 - 249,999\t9000\t< Bach.\t0\t1\tCleric.\t4\t0\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","### Build a k-Means model using the CUST_DF proxy object"],"enabled":true,"result":{"startTime":1713901555422,"interpreter":"md.low","endTime":1713901555481,"results":[{"message":"<h3 id=\"build-a-k-means-model-using-the-cust_df-proxy-object\">Build a k-Means model using the CUST_DF proxy object<\/h3>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Build k-Means model with 3 clusters with default settings","message":["%python","","try:"," oml.drop(model=\"CUST_CLUSTER_MODEL\")","except:"," pass","","setting = {'KMNS_ITERATIONS': 20, 'KMNS_RANDOM_SEED': 7}","km_mod = oml.km(n_clusters = 3, **setting).fit(CUST_DF, model_name = \"CUST_CLUSTER_MODEL\", case_id = 'CUST_ID')"],"enabled":true,"result":{"startTime":1713901555552,"interpreter":"python.low","endTime":1713901559352,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Build k-Means model with 3 clusters with explicit settings","message":["%python","","try:"," oml.drop(model=\"CUST_CLUSTER_MODEL\")","except:"," pass","","setting = {'KMNS_ITERATIONS' : 20, "," 'KMNS_CONV_TOLERANCE' : .001,"," 'KMNS_DISTANCE' : 'KMNS_EUCLIDEAN',"," 'KMNS_MIN_PCT_ATTR_SUPPORT': .1,"," 'KMNS_NUM_BINS' : 11,"," 'KMNS_SPLIT_CRITERION' : 'KMNS_VARIANCE',"," 'KMNS_RANDOM_SEED' : 7,"," 'KMNS_DETAILS' : 'KMNS_DETAILS_ALL',"," 'ODMS_DETAILS' : 'ODMS_ENABLE',"," 'ODMS_SAMPLING' : 'ODMS_SAMPLING_DISABLE',"," 'PREP_AUTO' : 'ON'}","","km_mod = oml.km(n_clusters = 3, **setting)","km_mod = km_mod.fit(CUST_DF, "," model_name = \"CUST_CLUSTER_MODEL\", "," case_id = 'CUST_ID')"],"enabled":true,"result":{"startTime":1713901559421,"interpreter":"python.low","endTime":1713901562417,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"raw\":{\"height\":300,\"lastColumns\":[],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"raw","title":"Display k-Means model details","message":["%python","","km_mod"],"enabled":true,"result":{"startTime":1713901562489,"interpreter":"python.low","endTime":1713901563134,"results":[{"message":"\nModel Name: CUST_CLUSTER_MODEL\n\nModel Owner: OMLUSER\n\nAlgorithm Name: K-Means\n\nMining Function: CLUSTERING\n\nSettings: \n setting name setting value\n0 ALGO_NAME ALGO_KMEANS\n1 CLUS_NUM_CLUSTERS 3\n2 KMNS_CONV_TOLERANCE 0.001\n3 KMNS_DETAILS KMNS_DETAILS_ALL\n4 KMNS_DISTANCE KMNS_EUCLIDEAN\n5 KMNS_ITERATIONS 20\n6 KMNS_MIN_PCT_ATTR_SUPPORT 0.1\n7 KMNS_NUM_BINS 11\n8 KMNS_RANDOM_SEED 7\n9 KMNS_SPLIT_CRITERION KMNS_VARIANCE\n10 ODMS_DETAILS ODMS_ENABLE\n11 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO\n12 ODMS_SAMPLING ODMS_SAMPLING_DISABLE\n13 PREP_AUTO ON\n\nGlobal Statistics: \n attribute name attribute value\n0 CONVERGED YES\n1 NUM_ROWS 4500\n\nAttributes: \nAFFINITY_CARD\nCUST_CREDIT_LIMIT\nCUST_GENDER\nCUST_INCOME_LEVEL\nCUST_MARITAL_STATUS\nCUST_YEAR_OF_BIRTH\nEDUCATION\nHOUSEHOLD_SIZE\nOCCUPATION\nYRS_RESIDENCE\nY_BOX_GAMES\n\nPartition: NO\n\nClusters: \n\n CLUSTER_ID ROW_CNT PARENT_CLUSTER_ID TREE_LEVEL DISPERSION\n0 1 4500 NaN 1 6.276772\n1 2 3123 1.0 2 6.586027\n2 3 1377 1.0 2 5.575388\n3 4 2085 2.0 3 6.938511\n4 5 1038 2.0 3 5.878003\n\nTaxonomy: \n\n PARENT_CLUSTER_ID CHILD_CLUSTER_ID\n0 1 2.0\n1 1 3.0\n2 2 4.0\n3 2 5.0\n4 3 NaN\n5 4 NaN\n6 5 NaN\n\nCentroids: \n\n CLUSTER_ID ATTRIBUTE_NAME MEAN MODE_VALUE VARIANCE\n0 1 AFFINITY_CARD 0.238222 None 1.815127e-01\n1 1 CUST_CREDIT_LIMIT 7924.222222 None 1.591424e+07\n2 1 CUST_GENDER NaN M NaN\n3 1 CUST_INCOME_LEVEL NaN J: 190,000 - 249,999 NaN\n4 1 CUST_MARITAL_STATUS NaN Married NaN\n5 1 CUST_YEAR_OF_BIRTH 1964.624444 None 1.871268e+02\n6 1 EDUCATION NaN HS-grad NaN\n7 1 HOUSEHOLD_SIZE NaN 3 NaN\n8 1 OCCUPATION NaN Crafts NaN\n9 1 YRS_RESIDENCE 4.022000 None 3.617431e+00\n10 1 Y_BOX_GAMES 0.312444 None 2.148707e-01\n11 2 AFFINITY_CARD 0.332373 None 2.219722e-01\n12 2 CUST_CREDIT_LIMIT 7758.245277 None 1.591287e+07\n13 2 CUST_GENDER NaN M NaN\n14 2 CUST_INCOME_LEVEL NaN J: 190,000 - 249,999 NaN\n15 2 CUST_MARITAL_STATUS NaN Married NaN\n16 2 CUST_YEAR_OF_BIRTH 1958.073647 None 1.240862e+02\n17 2 EDUCATION NaN HS-grad NaN\n18 2 HOUSEHOLD_SIZE NaN 3 NaN\n19 2 OCCUPATION NaN Exec. NaN\n20 2 YRS_RESIDENCE 4.797951 None 2.846735e+00\n21 2 Y_BOX_GAMES 0.009606 None 9.516917e-03\n22 3 AFFINITY_CARD 0.024691 None 2.409920e-02\n23 3 CUST_CREDIT_LIMIT 8300.653595 None 1.572459e+07\n24 3 CUST_GENDER NaN M NaN\n25 3 CUST_INCOME_LEVEL NaN J: 190,000 - 249,999 NaN\n26 3 CUST_MARITAL_STATUS NaN NeverM NaN\n27 3 CUST_YEAR_OF_BIRTH 1979.481481 None 1.200711e+01\n28 3 EDUCATION NaN < Bach. NaN\n29 3 HOUSEHOLD_SIZE NaN 1 NaN\n30 3 OCCUPATION NaN Other NaN\n31 3 YRS_RESIDENCE 2.262164 None 9.028770e-01\n32 3 Y_BOX_GAMES 0.999274 None 7.262164e-04\n33 4 AFFINITY_CARD 0.000000 None 0.000000e+00\n34 4 CUST_CREDIT_LIMIT 7828.057554 None 1.567640e+07\n35 4 CUST_GENDER NaN M NaN\n36 4 CUST_INCOME_LEVEL NaN J: 190,000 - 249,999 NaN\n37 4 CUST_MARITAL_STATUS NaN Married NaN\n38 4 CUST_YEAR_OF_BIRTH 1958.042206 None 1.355078e+02\n39 4 EDUCATION NaN HS-grad NaN\n40 4 HOUSEHOLD_SIZE NaN 3 NaN\n41 4 OCCUPATION NaN Crafts NaN\n42 4 YRS_RESIDENCE 4.603357 None 2.747110e+00\n43 4 Y_BOX_GAMES 0.000000 None 0.000000e+00\n44 5 AFFINITY_CARD 1.000000 None 0.000000e+00\n45 5 CUST_CREDIT_LIMIT 7618.015414 None 1.637396e+07\n46 5 CUST_GENDER NaN M NaN\n47 5 CUST_INCOME_LEVEL NaN J: 190,000 - 249,999 NaN\n48 5 CUST_MARITAL_STATUS NaN Married NaN\n49 5 CUST_YEAR_OF_BIRTH 1958.136802 None 1.012465e+02\n50 5 EDUCATION NaN Bach. NaN\n51 5 HOUSEHOLD_SIZE NaN 3 NaN\n52 5 OCCUPATION NaN Exec. NaN\n53 5 YRS_RESIDENCE 5.188825 None 2.820627e+00\n54 5 Y_BOX_GAMES 0.028902 None 2.809349e-02\n\nLeaf Cluster Counts: \n\n CLUSTER_ID CNT\n0 3 1377\n1 4 2085\n2 5 1038\n\nCluster Hists: \n\n cluster.id variable bin.id lower.bound upper.bound label count\n0 1 AFFINITY_CARD 1 0.0 0.1 0:.1 3428\n1 1 AFFINITY_CARD 2 0.1 0.2 .1:.2 0\n2 1 AFFINITY_CARD 3 0.2 0.3 .2:.3 0\n3 1 AFFINITY_CARD 4 0.3 0.4 .3:.4 0\n4 1 AFFINITY_CARD 5 0.4 0.5 .4:.5 0\n.. ... ... ... ... ... ... ...\n535 5 Y_BOX_GAMES 6 0.5 0.6 .5:.6 0\n536 5 Y_BOX_GAMES 7 0.6 0.7 .6:.7 0\n537 5 Y_BOX_GAMES 8 0.7 0.8 .7:.8 0\n538 5 Y_BOX_GAMES 9 0.8 0.9 .8:.9 0\n539 5 Y_BOX_GAMES 10 0.9 1.0 .9:1 30\n\n[540 rows x 7 columns]\n\nRules: \n\n cluster.id rhs.support rhs.conf lhr.support lhs.conf lhs.var lhs.var.support lhs.var.conf predicate\n0 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN < Bach.\n1 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN Assoc-A\n2 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN Assoc-V\n3 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN Bach.\n4 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN HS-grad\n5 1 4500 1.000000 3840 0.853333 EDUCATION 3840 0.625000 EDUCATION IN Masters\n6 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN B: 30,000 - 49,999\n7 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN E: 90,000 - 109,999\n8 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN F: 110,000 - 129,999\n9 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN G: 130,000 - 149,999\n10 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN H: 150,000 - 169,999\n11 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN I: 170,000 - 189,999\n12 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN J: 190,000 - 249,999\n13 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN K: 250,000 - 299,999\n14 1 4500 1.000000 4108 0.853333 CUST_INCOME_LEVEL 3840 0.250000 CUST_INCOME_LEVEL IN L: 300,000 and above\n15 1 4500 1.000000 4133 0.853333 HOUSEHOLD_SIZE 3840 0.333333 HOUSEHOLD_SIZE IN 1\n16 1 4500 1.000000 4133 0.853333 HOUSEHOLD_SIZE 3840 0.333333 HOUSEHOLD_SIZE IN 2\n17 1 4500 1.000000 4133 0.853333 HOUSEHOLD_SIZE 3840 0.333333 HOUSEHOLD_SIZE IN 3\n18 1 4500 1.000000 4133 0.853333 HOUSEHOLD_SIZE 3840 0.333333 HOUSEHOLD_SIZE IN 9+\n19 1 4500 1.000000 4152 0.853333 CUST_MARITAL_STATUS 3840 0.571429 CUST_MARITAL_STATUS IN Divorc.\n20 1 4500 1.000000 4152 0.853333 CUST_MARITAL_STATUS 3840 0.571429 CUST_MARITAL_STATUS IN Married\n21 1 4500 1.000000 4152 0.853333 CUST_MARITAL_STATUS 3840 0.571429 CUST_MARITAL_STATUS IN NeverM\n22 1 4500 1.000000 4190 0.853333 CUST_YEAR_OF_BIRTH 3840 0.400000 CUST_YEAR_OF_BIRTH <= 1986\n23 1 4500 1.000000 4190 0.853333 CUST_YEAR_OF_BIRTH 3840 0.400000 CUST_YEAR_OF_BIRTH > 1942.2\n24 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN ?\n25 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Cleric.\n26 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Crafts\n27 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Exec.\n28 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Handler\n29 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Machine\n30 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Other\n31 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Prof.\n32 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Sales\n33 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN TechSup\n34 1 4500 1.000000 4268 0.853333 OCCUPATION 3840 0.266667 OCCUPATION IN Transp.\n35 1 4500 1.000000 4409 0.853333 YRS_RESIDENCE 3840 0.400000 YRS_RESIDENCE <= 8.4\n36 1 4500 1.000000 4409 0.853333 YRS_RESIDENCE 3840 0.400000 YRS_RESIDENCE >= 0\n37 1 4500 1.000000 4500 0.853333 AFFINITY_CARD 3840 0.000000 AFFINITY_CARD <= 1\n38 1 4500 1.000000 4500 0.853333 AFFINITY_CARD 3840 0.000000 AFFINITY_CARD >= 0\n39 1 4500 1.000000 4500 0.853333 CUST_CREDIT_LIMIT 3840 0.000000 CUST_CREDIT_LIMIT <= 15000\n40 1 4500 1.000000 4500 0.853333 CUST_CREDIT_LIMIT 3840 0.000000 CUST_CREDIT_LIMIT >= 1500\n41 1 4500 1.000000 4500 0.853333 CUST_GENDER 3840 0.000000 CUST_GENDER IN F\n42 1 4500 1.000000 4500 0.853333 CUST_GENDER 3840 0.000000 CUST_GENDER IN M\n43 1 4500 1.000000 4500 0.853333 Y_BOX_GAMES 3840 0.000000 Y_BOX_GAMES <= 1\n44 1 4500 1.000000 4500 0.853333 Y_BOX_GAMES 3840 0.000000 Y_BOX_GAMES >= 0\n45 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN < Bach.\n46 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN Assoc-A\n47 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN Assoc-V\n48 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN Bach.\n49 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN HS-grad\n50 2 3123 0.694000 2682 0.858790 EDUCATION 2682 0.001083 EDUCATION IN Masters\n51 2 3123 0.694000 2772 0.858790 HOUSEHOLD_SIZE 2682 0.014310 HOUSEHOLD_SIZE IN 2\n52 2 3123 0.694000 2772 0.858790 HOUSEHOLD_SIZE 2682 0.014310 HOUSEHOLD_SIZE IN 3\n53 2 3123 0.694000 2772 0.858790 HOUSEHOLD_SIZE 2682 0.014310 HOUSEHOLD_SIZE IN 9+\n54 2 3123 0.694000 2812 0.858790 CUST_MARITAL_STATUS 2682 0.012524 CUST_MARITAL_STATUS IN Divorc.\n55 2 3123 0.694000 2812 0.858790 CUST_MARITAL_STATUS 2682 0.012524 CUST_MARITAL_STATUS IN Married\n56 2 3123 0.694000 2812 0.858790 CUST_MARITAL_STATUS 2682 0.012524 CUST_MARITAL_STATUS IN NeverM\n57 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN B: 30,000 - 49,999\n58 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN E: 90,000 - 109,999\n59 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN F: 110,000 - 129,999\n60 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN G: 130,000 - 149,999\n61 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN H: 150,000 - 169,999\n62 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN I: 170,000 - 189,999\n63 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN J: 190,000 - 249,999\n64 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN K: 250,000 - 299,999\n65 2 3123 0.694000 2836 0.858790 CUST_INCOME_LEVEL 2682 0.000437 CUST_INCOME_LEVEL IN L: 300,000 and above\n66 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN ?\n67 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Cleric.\n68 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Crafts\n69 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Exec.\n70 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Machine\n71 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Other\n72 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Prof.\n73 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Sales\n74 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN TechSup\n75 2 3123 0.694000 2868 0.858790 OCCUPATION 2682 0.001381 OCCUPATION IN Transp.\n76 2 3123 0.694000 2882 0.858790 YRS_RESIDENCE 2682 0.015238 YRS_RESIDENCE <= 8.4\n77 2 3123 0.694000 2882 0.858790 YRS_RESIDENCE 2682 0.015238 YRS_RESIDENCE > 2.8\n78 2 3123 0.694000 3005 0.858790 CUST_YEAR_OF_BIRTH 2682 0.037672 CUST_YEAR_OF_BIRTH <= 1978.7\n79 2 3123 0.694000 3005 0.858790 CUST_YEAR_OF_BIRTH 2682 0.037672 CUST_YEAR_OF_BIRTH > 1934.9\n80 2 3123 0.694000 3093 0.858790 Y_BOX_GAMES 2682 0.045059 Y_BOX_GAMES <= .1\n81 2 3123 0.694000 3093 0.858790 Y_BOX_GAMES 2682 0.045059 Y_BOX_GAMES >= 0\n82 2 3123 0.694000 3123 0.858790 AFFINITY_CARD 2682 0.002368 AFFINITY_CARD <= 1\n83 2 3123 0.694000 3123 0.858790 AFFINITY_CARD 2682 0.002368 AFFINITY_CARD >= 0\n84 2 3123 0.694000 3123 0.858790 CUST_CREDIT_LIMIT 2682 0.000147 CUST_CREDIT_LIMIT <= 15000\n85 2 3123 0.694000 3123 0.858790 CUST_CREDIT_LIMIT 2682 0.000147 CUST_CREDIT_LIMIT >= 1500\n86 2 3123 0.694000 3123 0.858790 CUST_GENDER 2682 0.001089 CUST_GENDER IN F\n87 2 3123 0.694000 3123 0.858790 CUST_GENDER 2682 0.001089 CUST_GENDER IN M\n88 3 1377 0.306000 1153 0.837328 HOUSEHOLD_SIZE 1153 0.041464 HOUSEHOLD_SIZE IN 1\n89 3 1377 0.306000 1153 0.837328 HOUSEHOLD_SIZE 1153 0.041464 HOUSEHOLD_SIZE IN 2\n90 3 1377 0.306000 1153 0.837328 HOUSEHOLD_SIZE 1153 0.041464 HOUSEHOLD_SIZE IN 3\n91 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN 10th\n92 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN 11th\n93 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN < Bach.\n94 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN Assoc-V\n95 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN Bach.\n96 3 1377 0.306000 1231 0.837328 EDUCATION 1153 0.006582 EDUCATION IN HS-grad\n97 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN B: 30,000 - 49,999\n98 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN E: 90,000 - 109,999\n99 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN F: 110,000 - 129,999\n100 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN G: 130,000 - 149,999\n101 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN H: 150,000 - 169,999\n102 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN I: 170,000 - 189,999\n103 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN J: 190,000 - 249,999\n104 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN K: 250,000 - 299,999\n105 3 1377 0.306000 1272 0.837328 CUST_INCOME_LEVEL 1153 0.002354 CUST_INCOME_LEVEL IN L: 300,000 and above\n106 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN ?\n107 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Cleric.\n108 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Crafts\n109 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Exec.\n110 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Handler\n111 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Machine\n112 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Other\n113 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Prof.\n114 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN Sales\n115 3 1377 0.306000 1277 0.837328 OCCUPATION 1153 0.007270 OCCUPATION IN TechSup\n116 3 1377 0.306000 1279 0.837328 CUST_MARITAL_STATUS 1153 0.050995 CUST_MARITAL_STATUS IN Married\n117 3 1377 0.306000 1279 0.837328 CUST_MARITAL_STATUS 1153 0.050995 CUST_MARITAL_STATUS IN NeverM\n118 3 1377 0.306000 1343 0.837328 AFFINITY_CARD 1153 0.024646 AFFINITY_CARD <= .1\n119 3 1377 0.306000 1343 0.837328 AFFINITY_CARD 1153 0.024646 AFFINITY_CARD >= 0\n120 3 1377 0.306000 1359 0.837328 YRS_RESIDENCE 1153 0.065320 YRS_RESIDENCE <= 4.2\n121 3 1377 0.306000 1359 0.837328 YRS_RESIDENCE 1153 0.065320 YRS_RESIDENCE >= 0\n122 3 1377 0.306000 1376 0.837328 Y_BOX_GAMES 1153 0.143419 Y_BOX_GAMES <= 1\n123 3 1377 0.306000 1376 0.837328 Y_BOX_GAMES 1153 0.143419 Y_BOX_GAMES > .9\n124 3 1377 0.306000 1377 0.837328 CUST_CREDIT_LIMIT 1153 0.000766 CUST_CREDIT_LIMIT <= 15000\n125 3 1377 0.306000 1377 0.837328 CUST_CREDIT_LIMIT 1153 0.000766 CUST_CREDIT_LIMIT >= 1500\n126 3 1377 0.306000 1377 0.837328 CUST_GENDER 1153 0.005175 CUST_GENDER IN F\n127 3 1377 0.306000 1377 0.837328 CUST_GENDER 1153 0.005175 CUST_GENDER IN M\n128 3 1377 0.306000 1377 0.837328 CUST_YEAR_OF_BIRTH 1153 0.133039 CUST_YEAR_OF_BIRTH <= 1986\n129 3 1377 0.306000 1377 0.837328 CUST_YEAR_OF_BIRTH 1153 0.133039 CUST_YEAR_OF_BIRTH > 1971.4\n130 4 2085 0.463333 1802 0.864269 CUST_MARITAL_STATUS 1802 0.010504 CUST_MARITAL_STATUS IN Divorc.\n131 4 2085 0.463333 1802 0.864269 CUST_MARITAL_STATUS 1802 0.010504 CUST_MARITAL_STATUS IN Married\n132 4 2085 0.463333 1802 0.864269 CUST_MARITAL_STATUS 1802 0.010504 CUST_MARITAL_STATUS IN NeverM\n133 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN 7th-8th\n134 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN < Bach.\n135 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN Assoc-A\n136 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN Assoc-V\n137 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN Bach.\n138 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN HS-grad\n139 4 2085 0.463333 1842 0.864269 EDUCATION 1802 0.002405 EDUCATION IN Masters\n140 4 2085 0.463333 1844 0.864269 HOUSEHOLD_SIZE 1802 0.011541 HOUSEHOLD_SIZE IN 2\n141 4 2085 0.463333 1844 0.864269 HOUSEHOLD_SIZE 1802 0.011541 HOUSEHOLD_SIZE IN 3\n142 4 2085 0.463333 1844 0.864269 HOUSEHOLD_SIZE 1802 0.011541 HOUSEHOLD_SIZE IN 9+\n143 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN B: 30,000 - 49,999\n144 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN E: 90,000 - 109,999\n145 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN F: 110,000 - 129,999\n146 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN G: 130,000 - 149,999\n147 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN H: 150,000 - 169,999\n148 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN I: 170,000 - 189,999\n149 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN J: 190,000 - 249,999\n150 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN K: 250,000 - 299,999\n151 4 2085 0.463333 1887 0.864269 CUST_INCOME_LEVEL 1802 0.000278 CUST_INCOME_LEVEL IN L: 300,000 and above\n152 4 2085 0.463333 1909 0.864269 YRS_RESIDENCE 1802 0.011783 YRS_RESIDENCE <= 8.4\n153 4 2085 0.463333 1909 0.864269 YRS_RESIDENCE 1802 0.011783 YRS_RESIDENCE > 2.8\n154 4 2085 0.463333 1989 0.864269 CUST_YEAR_OF_BIRTH 1802 0.038027 CUST_YEAR_OF_BIRTH <= 1978.7\n155 4 2085 0.463333 1989 0.864269 CUST_YEAR_OF_BIRTH 1802 0.038027 CUST_YEAR_OF_BIRTH > 1934.9\n156 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN ?\n157 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Cleric.\n158 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Crafts\n159 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Exec.\n160 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Farming\n161 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Handler\n162 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Machine\n163 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Other\n164 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Prof.\n165 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Sales\n166 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN TechSup\n167 4 2085 0.463333 2031 0.864269 OCCUPATION 1802 0.001454 OCCUPATION IN Transp.\n168 4 2085 0.463333 2085 0.864269 AFFINITY_CARD 1802 0.039364 AFFINITY_CARD <= .1\n169 4 2085 0.463333 2085 0.864269 AFFINITY_CARD 1802 0.039364 AFFINITY_CARD >= 0\n170 4 2085 0.463333 2085 0.864269 CUST_CREDIT_LIMIT 1802 0.000065 CUST_CREDIT_LIMIT <= 15000\n171 4 2085 0.463333 2085 0.864269 CUST_CREDIT_LIMIT 1802 0.000065 CUST_CREDIT_LIMIT >= 1500\n172 4 2085 0.463333 2085 0.864269 CUST_GENDER 1802 0.001412 CUST_GENDER IN F\n173 4 2085 0.463333 2085 0.864269 CUST_GENDER 1802 0.001412 CUST_GENDER IN M\n174 4 2085 0.463333 2085 0.864269 Y_BOX_GAMES 1802 0.053345 Y_BOX_GAMES <= .1\n175 4 2085 0.463333 2085 0.864269 Y_BOX_GAMES 1802 0.053345 Y_BOX_GAMES >= 0\n176 5 1038 0.230667 891 0.858382 CUST_GENDER 891 0.038201 CUST_GENDER = M\n177 5 1038 0.230667 909 0.858382 CUST_MARITAL_STATUS 891 0.057819 CUST_MARITAL_STATUS = Married\n178 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Cleric.\n179 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Crafts\n180 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Exec.\n181 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Machine\n182 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Prof.\n183 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Sales\n184 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN TechSup\n185 5 1038 0.230667 946 0.858382 OCCUPATION 891 0.021280 OCCUPATION IN Transp.\n186 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN B: 30,000 - 49,999\n187 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN E: 90,000 - 109,999\n188 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN F: 110,000 - 129,999\n189 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN G: 130,000 - 149,999\n190 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN H: 150,000 - 169,999\n191 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN I: 170,000 - 189,999\n192 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN J: 190,000 - 249,999\n193 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN K: 250,000 - 299,999\n194 5 1038 0.230667 949 0.858382 CUST_INCOME_LEVEL 891 0.001487 CUST_INCOME_LEVEL IN L: 300,000 and above\n195 5 1038 0.230667 973 0.858382 YRS_RESIDENCE 891 0.028267 YRS_RESIDENCE <= 8.4\n196 5 1038 0.230667 973 0.858382 YRS_RESIDENCE 891 0.028267 YRS_RESIDENCE > 2.8\n197 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN < Bach.\n198 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN Assoc-A\n199 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN Assoc-V\n200 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN Bach.\n201 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN HS-grad\n202 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN Masters\n203 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN PhD\n204 5 1038 0.230667 1003 0.858382 EDUCATION 891 0.015113 EDUCATION IN Profsc\n205 5 1038 0.230667 1003 0.858382 HOUSEHOLD_SIZE 891 0.066799 HOUSEHOLD_SIZE IN 2\n206 5 1038 0.230667 1003 0.858382 HOUSEHOLD_SIZE 891 0.066799 HOUSEHOLD_SIZE IN 3\n207 5 1038 0.230667 1003 0.858382 HOUSEHOLD_SIZE 891 0.066799 HOUSEHOLD_SIZE IN 4-5\n208 5 1038 0.230667 1008 0.858382 Y_BOX_GAMES 891 0.035169 Y_BOX_GAMES <= .1\n209 5 1038 0.230667 1008 0.858382 Y_BOX_GAMES 891 0.035169 Y_BOX_GAMES >= 0\n210 5 1038 0.230667 1016 0.858382 CUST_YEAR_OF_BIRTH 891 0.039020 CUST_YEAR_OF_BIRTH <= 1978.7\n211 5 1038 0.230667 1016 0.858382 CUST_YEAR_OF_BIRTH 891 0.039020 CUST_YEAR_OF_BIRTH > 1934.9\n212 5 1038 0.230667 1038 0.858382 AFFINITY_CARD 891 0.169369 AFFINITY_CARD <= 1\n213 5 1038 0.230667 1038 0.858382 AFFINITY_CARD 891 0.169369 AFFINITY_CARD > .9\n214 5 1038 0.230667 1038 0.858382 CUST_CREDIT_LIMIT 891 0.000585 CUST_CREDIT_LIMIT <= 15000\n215 5 1038 0.230667 1038 0.858382 CUST_CREDIT_LIMIT 891 0.000585 CUST_CREDIT_LIMIT >= 1500\n\n\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Display model settings","message":["%python","","km_mod.settings"],"enabled":true,"result":{"startTime":1713901563226,"interpreter":"python.low","endTime":1713901563302,"results":[{"message":"{'kmns_iterations': 20, 'kmns_conv_tolerance': 0.001, 'kmns_distance': 'KMNS_EUCLIDEAN', 'kmns_min_pct_attr_support': 0.1, 'kmns_num_bins': 11, 'kmns_split_criterion': 'KMNS_VARIANCE', 'kmns_random_seed': 7, 'kmns_details': 'KMNS_DETAILS_ALL', 'odms_details': 'ODMS_ENABLE', 'odms_sampling': 'ODMS_SAMPLING_DISABLE', 'prep_auto': 'ON', 'clus_num_clusters': 3}\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display cluster details for all clusters in the hierarchy with row counts and dispersion","message":["%python","","z.show(km_mod.clusters)"],"enabled":true,"result":{"startTime":1713901563372,"interpreter":"python.low","endTime":1713901563465,"results":[{"message":"CLUSTER_ID\tROW_CNT\tPARENT_CLUSTER_ID\tTREE_LEVEL\tDISPERSION\n1.0\t4500.0\tnan\t1.0\t6.276771678127229\n2.0\t3123.0\t1.0\t2.0\t6.5860271201254195\n3.0\t1377.0\t1.0\t2.0\t5.575388420784927\n4.0\t2085.0\t2.0\t3.0\t6.9385112173435965\n5.0\t1038.0\t2.0\t3.0\t5.878002705192953\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Define function to format the cluster centroid rule","message":["%python","","def display_rule(model, cluster_id, dat):"," rules = model.rules"," cluster_rules = rules[rules['cluster.id'] == cluster_id]"," vars = cluster_rules['lhs.var'].drop_duplicates().pull()"," res = []",""," for var in vars:"," rule = []"," var_rules = cluster_rules[cluster_rules['lhs.var'] == var, 'predicate'].pull()"," if isinstance(dat[var], oml.String):"," for r in var_rules:"," v = r.split(\" IN \")[1]"," if len(v) > 0:"," rule.append(v)"," rule = var + \" IN {\" + ','.join(rule) + \"}\""," else:"," rule = \" and \".join(var_rules)"," res.append(rule + \"\\n\")",""," print(\"if \" + \" and \".join(res) + \"then cluster \" + str(cluster_id))"],"enabled":true,"result":{"startTime":1713901606100,"interpreter":"python.low","endTime":1713901606162,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":0,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Display formatted rule for cluster 3","message":["%python","","display_rule(model = km_mod, "," cluster_id = 3, "," dat = CUST_DF)"],"enabled":true,"result":{"startTime":1713901609220,"interpreter":"python.low","endTime":1713901610031,"results":[{"message":"if CUST_CREDIT_LIMIT <= 15000 and CUST_CREDIT_LIMIT >= 1500\n and CUST_INCOME_LEVEL IN {B: 30,000 - 49,999,E: 90,000 - 109,999,F: 110,000 - 129,999,G: 130,000 - 149,999,H: 150,000 - 169,999,I: 170,000 - 189,999,J: 190,000 - 249,999,K: 250,000 - 299,999,L: 300,000 and above}\n and HOUSEHOLD_SIZE IN {1,2,3}\n and OCCUPATION IN {?,Cleric.,Crafts,Exec.,Handler,Machine,Other,Prof.,Sales,TechSup}\n and YRS_RESIDENCE <= 4.2 and YRS_RESIDENCE >= 0\n and CUST_YEAR_OF_BIRTH <= 1986 and CUST_YEAR_OF_BIRTH > 1971.4\n and AFFINITY_CARD <= .1 and AFFINITY_CARD >= 0\n and CUST_MARITAL_STATUS IN {Married,NeverM}\n and Y_BOX_GAMES <= 1 and Y_BOX_GAMES > .9\n and EDUCATION IN {10th,11th,< Bach.,Assoc-V,Bach.,HS-grad}\n and CUST_GENDER IN {F,M}\nthen cluster 3\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":0,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Apply clustering model to assign rows to clusters","message":["%python","","pred = km_mod.predict(CUST_DF, supplemental_cols = CUST_DF)"],"enabled":true,"result":{"startTime":1713901619500,"interpreter":"python.low","endTime":1713901619619,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":5,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Use matplotlib to view cluster results using birth year and years residence","message":["%python","","pred_df = pred[['CUST_ID', 'CLUSTER_ID', 'CUST_YEAR_OF_BIRTH', 'YRS_RESIDENCE', 'CUST_CREDIT_LIMIT']].pull()","","fig = plt.figure()","ax=fig.add_axes([0.1,0.1,0.8,0.8])","","ids = pred_df['CLUSTER_ID']","clusters = ids.drop_duplicates().values","handles = []","labs = []","colors = ['r', 'b', 'g']","for i, c in enumerate(clusters):"," xc = pred_df[pred_df['CLUSTER_ID'] == c]['YRS_RESIDENCE'].values"," yc = pred_df[pred_df['CLUSTER_ID'] == c]['CUST_YEAR_OF_BIRTH'].values"," "," h = ax.scatter(xc, yc, color= colors[i])"," handles.append(h)"," labs.append('CLUSTER' + str(c))","ax.legend(handles, labs)","plt.title('K-Means Clustering')","","plt.grid(True)","","plt.xlabel('YRS_RESIDENCE')","plt.ylabel('CUST_YEAR_OF_BIRTH')","plt.show()"],"enabled":true,"result":{"startTime":1713901619696,"interpreter":"python.low","endTime":1713901619964,"results":[{"message":"<div style='width:auto;height:auto'><img src= style='width=auto;height:auto'><div>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":7,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","### Compare clusters","","Compare the feature distribution for year of birth in clusters 4 and 5. Cluster 4 tends to have middle aged individuals, while cluster 5 has more senior individuals. "],"enabled":true,"result":{"startTime":1713901620056,"interpreter":"md.low","endTime":1713901620124,"results":[{"message":"<h3 id=\"compare-clusters\">Compare clusters<\/h3>\n<p>Compare the feature distribution for year of birth in clusters 4 and 5. Cluster 4 tends to have middle aged individuals, while cluster 5 has more senior individuals.<\/p>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"table\":{\"version\":1},\"bar\":{\"showSeries\":[\"count\"],\"series\":{\"availableSeriesElements\":[{\"id\":\"count\",\"lineType\":\"straight\",\"borderColor\":\"rgb(25, 95, 116)\",\"borderWidth\":0,\"color\":\"rgb(25, 95, 116)\",\"pattern\":\"auto\",\"markerColor\":\"rgb(25, 95, 116)\",\"markerDisplayed\":\"auto\",\"markerShape\":\"auto\",\"markerSize\":0}]},\"axis\":{\"y\":{\"max\":100}},\"lastColumns\":[\"CUST_YEAR_OF_BIRTH\",\"count\"],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"bar","title":"Distribution of the year of birth for cluster 4","message":["%python","","feature = 'CUST_YEAR_OF_BIRTH' ","","DF = pred[pred['CLUSTER_ID'] == 4].crosstab([feature])","z.show(DF.sort_values(feature))"],"enabled":true,"result":{"startTime":1713901620192,"interpreter":"python.low","endTime":1713901620512,"results":[{"message":"CUST_YEAR_OF_BIRTH\tcount\n1913\t5\n1921\t1\n1922\t6\n1923\t4\n1924\t2\n1925\t6\n1926\t10\n1927\t3\n1928\t14\n1929\t6\n1930\t4\n1931\t6\n1932\t9\n1933\t8\n1934\t12\n1935\t13\n1936\t12\n1937\t10\n1938\t17\n1939\t22\n1940\t22\n1941\t24\n1942\t16\n1943\t32\n1944\t31\n1945\t32\n1946\t33\n1947\t35\n1948\t34\n1949\t34\n1950\t38\n1951\t25\n1952\t49\n1953\t20\n1954\t61\n1955\t50\n1956\t63\n1957\t71\n1958\t66\n1959\t72\n1960\t50\n1961\t75\n1962\t81\n1963\t70\n1964\t83\n1965\t59\n1966\t86\n1967\t92\n1968\t93\n1969\t95\n1970\t89\n1971\t93\n1972\t78\n1973\t63\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"table\":{\"version\":1},\"bar\":{\"showSeries\":[\"count\"],\"series\":{\"availableSeriesElements\":[{\"id\":\"count\",\"lineType\":\"straight\",\"borderColor\":\"rgb(25, 95, 116)\",\"borderWidth\":0,\"color\":\"rgb(25, 95, 116)\",\"pattern\":\"auto\",\"markerColor\":\"rgb(25, 95, 116)\",\"markerDisplayed\":\"auto\",\"markerShape\":\"auto\",\"markerSize\":0}]},\"axis\":{\"y\":{\"max\":100}},\"lastColumns\":[\"CUST_YEAR_OF_BIRTH\",\"count\"],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"bar","title":"Distribution of the year of birth for cluster 5","message":["%python","","feature = 'CUST_YEAR_OF_BIRTH' ","","DF = pred[pred['CLUSTER_ID'] == 5].crosstab([feature])","z.show(DF.sort_values(feature))"],"enabled":true,"result":{"startTime":1713901620589,"interpreter":"python.low","endTime":1713901620859,"results":[{"message":"CUST_YEAR_OF_BIRTH\tcount\n1924\t4\n1925\t2\n1926\t2\n1929\t1\n1931\t1\n1932\t4\n1933\t2\n1934\t6\n1935\t5\n1936\t10\n1937\t2\n1938\t9\n1939\t3\n1940\t8\n1941\t7\n1942\t12\n1943\t11\n1944\t21\n1945\t17\n1946\t12\n1947\t10\n1948\t18\n1949\t17\n1950\t22\n1951\t26\n1952\t34\n1953\t36\n1954\t32\n1955\t44\n1956\t33\n1957\t48\n1958\t39\n1959\t47\n1960\t50\n1961\t40\n1962\t32\n1963\t32\n1964\t22\n1965\t44\n1966\t32\n1967\t31\n1968\t44\n1969\t40\n1970\t27\n1971\t28\n1972\t12\n1973\t29\n1974\t10\n1975\t13\n1976\t2\n1977\t4\n1978\t1\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","### Using SQL access prediction details","One feature of OML is the ability to use models from SQL as well as Python. While OML4Py v1.0 does not expose prediction details through a Python API, such details are available through the SQL API, which is illustrated here.","","We first create a table from the Python proxy object CUSTOMERS360 so that SQL can reference it. This could just as easily been a database view. ","","Then, since prediction details are provided as an XML string, we extract the elements to present them as table columns. The 'first attribute' identfies the feature and value that most influences the cluster assignment with corresponding weight. This is followed by the second and third attribute. In this example, we provide three, but additional attributes could be presented as well. "],"enabled":true,"result":{"startTime":1713901620932,"interpreter":"md.low","endTime":1713901620991,"results":[{"message":"<h3 id=\"using-sql-access-prediction-details\">Using SQL access prediction details<\/h3>\n<p>One feature of OML is the ability to use models from SQL as well as Python. While OML4Py v1.0 does not expose prediction details through a Python API, such details are available through the SQL API, which is illustrated here.<\/p>\n<p>We first create a table from the Python proxy object CUSTOMERS360 so that SQL can reference it. This could just as easily been a database view.<\/p>\n<p>Then, since prediction details are provided as an XML string, we extract the elements to present them as table columns. The 'first attribute' identfies the feature and value that most influences the cluster assignment with corresponding weight. This is followed by the second and third attribute. In this example, we provide three, but additional attributes could be presented as well.<\/p>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Create table for use in SQL query","message":["%python","","try:"," oml.drop(table = 'CUSTOMERS360')","except:"," pass","_ = CUST_DF.materialize(table = 'CUSTOMERS360')"],"enabled":true,"result":{"startTime":1713901621210,"interpreter":"python.low","endTime":1713901621859,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"List 3 most relevant attributes for specific customers and likely cluster assignments","message":["%sql","","SELECT CUST_ID,"," CLUSTER_ID,"," ROUND(PROB*100,0) PROB_PCT,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute1\",17,100)),'rank=\"1\"/>') FIRST_ATTRIBUTE,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute2\",17,100)),'rank=\"2\"/>') SECOND_ATTRIBUTE,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute3\",17,100)),'rank=\"3\"/>') THIRD_ATTRIBUTE","FROM (SELECT CUST_ID, S.CLUSTER_ID, PROBABILITY PROB, "," CLUSTER_DETAILS(CUST_CLUSTER_MODEL USING T.*) DETAIL"," FROM (SELECT V.*, CLUSTER_SET(CUST_CLUSTER_MODEL, NULL, 0.2 USING *) PSET"," FROM CUSTOMERS360 V"," WHERE cust_id = ${CUST_ID ='101362','101362'|'102087'| '100456'}) T,"," TABLE(T.PSET) S"," ORDER BY 2 DESC) OUT,"," XMLTABLE('/Details'"," PASSING OUT.DETAIL"," COLUMNS "," \"Attribute1\" XMLType PATH 'Attribute[1]',"," \"Attribute2\" XMLType PATH 'Attribute[2]',"," \"Attribute3\" XMLType PATH 'Attribute[3]') OUTPRED"],"enabled":true,"result":{"startTime":1713901621928,"interpreter":"sql.low","endTime":1713901623724,"results":[{"message":"CUST_ID\tCLUSTER_ID\tPROB_PCT\tFIRST_ATTRIBUTE\tSECOND_ATTRIBUTE\tTHIRD_ATTRIBUTE\n100456\t5\t29\t\"AFFINITY_CARD\" actualValue=\"0\" weight=\".032\" \t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".027\" \t\"YRS_RESIDENCE\" actualValue=\"6\" weight=\".017\" \n100456\t4\t50\t\"AFFINITY_CARD\" actualValue=\"0\" weight=\".032\" \t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".027\" \t\"YRS_RESIDENCE\" actualValue=\"6\" weight=\".017\" \n100456\t3\t21\t\"AFFINITY_CARD\" actualValue=\"0\" weight=\".032\" \t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".027\" \t\"YRS_RESIDENCE\" actualValue=\"6\" weight=\".017\" \n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":"{\"CUST_ID\":\"'100456'\"}","row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[{\"type\":\"Select\",\"name\":\"CUST_ID\",\"displayName\":null,\"defaultValue\":\"'101362'\",\"argument\":null,\"options\":[{\"value\":\"'101362'\",\"displayName\":null},{\"value\":\"'102087'\",\"displayName\":null},{\"value\":\"'100456'\",\"displayName\":null}],\"isHidden\":false,\"isProgrammatic\":false}]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of Script"],"enabled":true,"result":{"startTime":1713901623797,"interpreter":"md.low","endTime":1713901623858,"results":[{"message":"<h2 id=\"end-of-script\">End of Script<\/h2>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":{"startTime":1713901623929,"interpreter":"md.low","endTime":1713901623989,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]